Authored: Kathy N. Lam

Experiment: November 20, 2019

Updated: April 16, 2021

Set up

library(flowCore) #for reading and manipulating flow data
library(ggcyto) #for using ggplot with flow data
library(Phenoflow) #for rarefying
library(flexmix) #to identify peaks in distributions including multimodal
library(tidyverse) #for general data wrangling and plotting

Read data

(fs = read.flowSet(path="data_fcs"))
## A flowSet with 6 experiments.
## 
##   column names:
##   530/30 Blue B-A 610/20 YG C-A FSC-A FSC-H FSC-W SSC-A SSC-H SSC-W Time
colnames(fs) 
## [1] "530/30 Blue B-A" "610/20 YG C-A"   "FSC-A"           "FSC-H"          
## [5] "FSC-W"           "SSC-A"           "SSC-H"           "SSC-W"          
## [9] "Time"
flowCore::pData(phenoData(fs))
#read in sample names and merge
(metadata = read_tsv("metadata.tsv") %>%
    rename(name=Filename) %>%
    mutate(sample=paste(Phage, Replicate)) %>%
    mutate(Replicate_Full = paste("Replicate", Replicate)) %>%
    mutate(Treatment_Full = gsub("NT", "Non-Targeting", Treatment)) %>%
    mutate(Treatment_Full = gsub("GFPT", "GFP-Targeting", Treatment_Full)) 
)
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Filename = col_character(),
##   Strain = col_character(),
##   Phage = col_character(),
##   Replicate = col_double(),
##   Timepoint = col_character(),
##   Treatment = col_character()
## )
#add columns to phenoData  
phenoData(fs)$Order = seq(1, length(phenoData(fs)$name))
phenoData(fs)$Phage = metadata$Phage
phenoData(fs)$Replicate = metadata$Replicate
phenoData(fs)$Replicate_Full = metadata$Replicate_Full
phenoData(fs)$Name = metadata$name
phenoData(fs)$Treatment = metadata$Treatment
phenoData(fs)$Treatment_Full = metadata$Treatment_Full

phenoData(fs)$Phage = factor(phenoData(fs)$Phage, levels=unique(phenoData(fs)$Phage))
phenoData(fs)$Treatment = factor(phenoData(fs)$Treatment, levels=unique(phenoData(fs)$Treatment))
phenoData(fs)$Treatment_Full = factor(phenoData(fs)$Treatment_Full, levels=unique(phenoData(fs)$Treatment_Full))
pData(phenoData(fs))
#make labeller function for plot facetting 
order = as.character(phenoData(fs)$Order)
name = phenoData(fs)$Name

order_names = mapply(c, order, name, SIMPLIFY = FALSE) #make a one-to-one 
order_names = lapply(order_names, `[[`, 2) #keep second element of each vector in the list
order_names = order_names[as.character(sort(as.numeric(names(order_names))))] #numerically sort 

order_labeller = function(variable,value){
  return(order_names[value])
}

Gate on scatter

scatter = rbind(c(0,     25000), 
                c(0,     160000),
                c(15000, 160000),
                c(15000, 25000))
colnames(scatter)=c("FSC-A", "SSC-A")
scatter = as.data.frame(scatter)

ggplot() + 
    geom_point(data=fs, aes(x=`FSC-A`, y=`SSC-A`), shape=16, size=0.75, alpha=0.5) +
    geom_polygon(data=scatter, aes(x=`FSC-A`, y=`SSC-A`), fill=NA, colour="indianred", size=0.75,  linetype="solid") +     
    scale_y_continuous(name="SSC-A (Granularity)\n", limits = c(-2e1,3e5)) +
    scale_x_continuous(name="\nFSC-A (Size)", limit=c(-2e1, 5e4)) +
    facet_grid(Treatment_Full~Replicate_Full) +
    theme_linedraw(14) +
    theme(panel.grid = element_blank()) 
## Warning: Removed 616 rows containing missing values (geom_point).

gate_scatter = flowCore::polygonGate(filterId="scatter", `FSC-A` = scatter$`FSC-A`, `SSC-A` = scatter$`SSC-A`) 
result = flowCore::filter(fs, gate_scatter)
events = flowCore::Subset(fs, result)

Rarefy

events = Phenoflow::FCS_resample(events, replace = FALSE, rarefy = FALSE, progress = TRUE)
## Your samples range between 10062 and 10403 cells
## Your samples were randomly subsampled to 10062 cells

Gate on fluoresence

red = rbind(
    c(-500,   200),
    c(-500,   4e4),
    c( 4000,  4e4),
    c( 1000,  200))
colnames(red)=c("530/30 Blue B-A", "610/20 YG C-A")
red = as.data.frame(red)

green = rbind(
    c(2000,   -500),
    c(2000,    800),
    c(300000,  800),
    c(300000, -500))
colnames(green)=c("530/30 Blue B-A", "610/20 YG C-A")
green = as.data.frame(green)

ggplot() + 
    geom_polygon(data=green, aes(x=`530/30 Blue B-A`, y=`610/20 YG C-A`), fill=NA, colour="green4", size=0.75, linetype="solid") +
    geom_polygon(data=red, aes(x=`530/30 Blue B-A`, y=`610/20 YG C-A`), fill=NA, colour="red3", size=0.75, linetype="solid") +
    geom_point(data=events, aes(x=`530/30 Blue B-A`, y=`610/20 YG C-A`), shape=16, size=1, alpha=0.2) +
    theme_linedraw(14) +
    ggcyto::scale_x_flowJo_biexp(name="GFP intensity", widthBasis=-500, pos=5, neg=0, limits=c(-500,3e5), breaks=c(0,1e3,1e4,1e5)) +
    ggcyto::scale_y_flowJo_biexp(name="mCherry intensity", widthBasis=-500, pos=5, neg=0, limits=c(-1000,1e5), breaks=c(0,1e3,1e4,1e5)) +
    theme(panel.grid = element_blank(), 
          axis.title.x = element_text(colour="green4"), 
          axis.title.y = element_text(colour="red3")) +
    facet_grid(Treatment_Full~Replicate_Full) 

gate_green = flowCore::polygonGate(filterId="green", `530/30 Blue B-A` = green$`530/30 Blue B-A`, `610/20 YG C-A` = green$`610/20 YG C-A`)
result_green = flowCore::filter(events, gate_green)
events_green = flowCore::Subset(events, result_green)
gate_red = flowCore::polygonGate(filterId="red", `530/30 Blue B-A` = red$`530/30 Blue B-A`, `610/20 YG C-A` = red$`610/20 YG C-A`)
result_red = flowCore::filter(events, gate_red)
events_red = flowCore::Subset(events, result_red)
percent_red = toTable(summary(result_red)) %>% 
    mutate(x = 5e4, y = 3e3, colour="red", order=phenoData(events)$order) %>%
    rename(name=sample) %>%
    left_join(metadata, by="name")
## filter summary for frame 'Specimen_001_A_001_001.fcs'
##  red+: 3732 of 10062 events (37.09%)
## 
## filter summary for frame 'Specimen_001_A_002_002.fcs'
##  red+: 3881 of 10062 events (38.57%)
## 
## filter summary for frame 'Specimen_001_A_003_003.fcs'
##  red+: 3791 of 10062 events (37.68%)
## 
## filter summary for frame 'Specimen_001_A_004_004.fcs'
##  red+: 6588 of 10062 events (65.47%)
## 
## filter summary for frame 'Specimen_001_A_005_005.fcs'
##  red+: 6645 of 10062 events (66.04%)
## 
## filter summary for frame 'Specimen_001_A_006_006.fcs'
##  red+: 6469 of 10062 events (64.29%)
percent_green = toTable(summary(result_green)) %>%
    mutate(x = 5e4, y = 2e3, colour="green", order=phenoData(events)$order) %>%
    rename(name=sample) %>%
    left_join(metadata, by="name")
## filter summary for frame 'Specimen_001_A_001_001.fcs'
##  green+: 6246 of 10062 events (62.08%)
## 
## filter summary for frame 'Specimen_001_A_002_002.fcs'
##  green+: 6114 of 10062 events (60.76%)
## 
## filter summary for frame 'Specimen_001_A_003_003.fcs'
##  green+: 6234 of 10062 events (61.96%)
## 
## filter summary for frame 'Specimen_001_A_004_004.fcs'
##  green+: 3377 of 10062 events (33.56%)
## 
## filter summary for frame 'Specimen_001_A_005_005.fcs'
##  green+: 3331 of 10062 events (33.10%)
## 
## filter summary for frame 'Specimen_001_A_006_006.fcs'
##  green+: 3506 of 10062 events (34.84%)
percentage = bind_rows(percent_red, percent_green) %>%
    group_by(name) %>% 
    mutate(total_gfp_mcherry = sum(true)) %>%
    mutate(percent_fluor = round(true/total_gfp_mcherry, 2) * 100) %>%
    mutate(label = paste0(true, " events (", percent_fluor, "%)")) %>%
    mutate(label2 = paste(true, "events"))
(ggplot() + 
    geom_point(data=events, aes(x=`530/30 Blue B-A`, y=`610/20 YG C-A`), shape=16, size=1, alpha=0.2, colour="black") +
    geom_point(data=events_green, aes(x=`530/30 Blue B-A`, y=`610/20 YG C-A`), shape=16, size=1.25, alpha=0.2, colour="green4") +
    geom_point(data=events_red, aes(x=`530/30 Blue B-A`, y=`610/20 YG C-A`), shape=16, size=1.25, alpha=0.2, colour="red3") +
    geom_text(data=percentage, aes(x=x, y=y, label=label, colour=colour), size=3.5, hjust=1) +
    geom_text(data=percentage, aes(x=3e5, y=3e4, label=paste0(count, " events total")), colour="grey27", size=3.5, hjust=1) +
    scale_colour_manual(values=c("green4", "red3")) +
    ggcyto::scale_x_flowJo_biexp(name="GFP intensity", widthBasis=-500, pos=5, neg=0, limits=c(-500,3e5), breaks=c(0,1e3,1e4,1e5)) +
    ggcyto::scale_y_flowJo_biexp(name="mCherry intensity", widthBasis=-500, pos=5, neg=0, limits=c(-1000,1e5), breaks=c(0,1e3,1e4,1e5)) +
    theme_linedraw(14) +
    theme(panel.grid = element_blank(), 
          axis.title.x = element_text(colour="green4"), 
          axis.title.y = element_text(colour="red3"), 
          legend.position = "none") +
   facet_grid(Treatment_Full~Replicate_Full) 
)

ggsave("figures/GFP_mCherry_8h_scatterplot_replicates.png", width=12, height=6)

Plot intensity distribution for RFP / GFP

ggplot(events_green) +
    geom_bar(stat="bin", aes(x=`530/30 Blue B-A`, y=stat(count)), fill="green4", bins=50, alpha=1, colour="black", size=0.25) +
    geom_vline(xintercept=3e4, linetype="dashed") +
    ggcyto::scale_x_flowJo_biexp(name="GFP intensity", widthBasis=-500, pos=5, neg=0, limits=c(1e3,3e5), breaks=c(0,1e3,1e4,1e5)) +
    scale_y_continuous(name="Count") +
    theme_linedraw(14) +
    theme(panel.grid = element_blank(), 
          axis.title.x = element_text(colour="green4"), 
          legend.position = "none") +
    facet_grid(Treatment_Full~Replicate_Full) 
## Warning: Removed 12 rows containing missing values (geom_bar).

ggsave("figures/distributions_GFP.png", width=12, height=6)
## Warning: Removed 12 rows containing missing values (geom_bar).
ggplot(events_red) +
    geom_bar(stat="bin", aes(x=`610/20 YG C-A`, y=stat(count)), fill="red3", bins=50, alpha=1, colour="black", size=0.25) +
    geom_vline(xintercept=1.15e3, linetype="dashed") +
    ggcyto::scale_x_flowJo_biexp(name="mCherry intensity", widthBasis=-500, pos=5, neg=0, limits=c(0,1e4), breaks=c(0,1e2,1e3,1e4)) +
    scale_y_continuous(name="Count") +
    theme_linedraw(14) +
    theme(panel.grid = element_blank(), 
          axis.title.x = element_text(colour="red3"), 
          legend.position = "none") +
    facet_grid(Treatment_Full~Replicate_Full) 
## Warning: Removed 127 rows containing non-finite values (stat_bin).
## Warning: Removed 12 rows containing missing values (geom_bar).

ggsave("figures/distributions_mCherry.png", width=12, height=6)
## Warning: Removed 127 rows containing non-finite values (stat_bin).

## Warning: Removed 12 rows containing missing values (geom_bar).

Plot representative replicate

samples=c(1,4)
events_rep1 = events[samples]
events_green_rep1 = events_green[samples]
events_red_rep1 = events_red[samples]

phenoData(events_rep1)$Treatment_Full = factor(phenoData(events_rep1)$Treatment_Full, levels=c("Non-Targeting", "GFP-Targeting"))
phenoData(events_green_rep1)$Treatment_Full = factor(phenoData(events_green_rep1)$Treatment_Full, levels=c("Non-Targeting", "GFP-Targeting"))
phenoData(events_red_rep1)$Treatment_Full = factor(phenoData(events_red_rep1)$Treatment_Full, levels=c("Non-Targeting", "GFP-Targeting"))

percentage_rep1 = percentage %>% 
    dplyr::filter(Replicate == 1) %>%
    mutate(Treatment_Full = factor(Treatment_Full, levels=c("Non-Targeting", "GFP-Targeting")))
ggplot() + 
    geom_point(data=events_rep1, aes(x=`530/30 Blue B-A`, y=`610/20 YG C-A`), shape=16, size=0.75, alpha=0.2, colour="black") +
    geom_point(data=events_green_rep1, aes(x=`530/30 Blue B-A`, y=`610/20 YG C-A`), shape=16, size=1, alpha=0.2, colour="green4") +
    geom_point(data=events_red_rep1, aes(x=`530/30 Blue B-A`, y=`610/20 YG C-A`), shape=16, size=1, alpha=0.2, colour="red3") +
    geom_text(data=percentage_rep1, aes(x=x, y=y, label=label, colour=colour), size=4, hjust=1) +
    geom_text(data=percentage_rep1, aes(x=-4e2, y=5e4, label=paste0(count, " events total")), colour="grey27", size=4, hjust=0) +
    scale_colour_manual(values=c("green4", "red3")) +
    ggcyto::scale_x_flowJo_biexp(name="GFP intensity", widthBasis=-500, pos=5, neg=0, limits=c(-500,3e5), breaks=c(0,1e3,1e4,1e5)) +
    ggcyto::scale_y_flowJo_biexp(name="mCherry intensity", widthBasis=-500, pos=5, neg=0, limits=c(-1000,1e5), breaks=c(0,1e3,1e4,1e5)) +
    theme_linedraw(16) +
    theme(panel.grid = element_blank(), 
          axis.title.x = element_text(colour="green4"), 
          axis.title.y = element_text(colour="red3"), 
          legend.position = "none", 
          panel.border = element_rect(size=1.25)) +
   facet_wrap(~Treatment_Full, scales="free_y")

ggsave("figures/2019-11-20_GFP_mCherry_8h_replicate1_scatterplot.pdf", width=12, height=6)
ggsave("figures/2019-11-20_GFP_mCherry_8h_replicate1_scatterplot.png", width=12, height=6, dpi=150)
events_green_rep1_compatible = events_green_rep1[1:length(events_green_rep1)]
events_red_rep1_compatible = events_red_rep1[1:length(events_red_rep1)]

colnames(events_green_rep1_compatible) = c("Intensity", "610/20 YG C-A", "FSC-A", "FSC-H", "FSC-W", "SSC-A", "SSC-H", "SSC-W", "Time")
colnames(events_red_rep1_compatible) = c("530/30 Blue B-A", "Intensity", "FSC-A", "FSC-H", "FSC-W", "SSC-A", "SSC-H", "SSC-W", "Time")
ggplot() +
    geom_bar(stat="bin", data=events_red_rep1_compatible, aes(x=`Intensity`, y=stat(count)), fill="red3", bins=50, alpha=0.75, colour="black", size=0.25) +
    geom_bar(stat="bin", data=events_green_rep1_compatible, aes(x=`Intensity`, y=stat(count)), fill="green4", bins=65, alpha=0.75, colour="black", size=0.25) +
    ggcyto::scale_x_flowJo_biexp(name="Intensity", widthBasis=-100, pos=5, neg=0, limits=c(1e2,3e5), breaks=c(0,1e3,1e4,1e5)) +
    scale_y_continuous(name="Scaled Count\n") +
    theme_linedraw(14) +
    theme(panel.grid = element_blank(),legend.position = "none", 
          panel.border=element_rect(size=1), 
          strip.text=element_blank()) +
    facet_wrap(~Treatment) 
## Warning: Removed 4 rows containing missing values (geom_bar).

## Warning: Removed 4 rows containing missing values (geom_bar).

ggsave("figures/2019-11-20_GFP_mCherry_8h_replicate1_distribution.pdf", width=12, height=3.5)
## Warning: Removed 4 rows containing missing values (geom_bar).

## Warning: Removed 4 rows containing missing values (geom_bar).

Plot unscaled and scaled distributions

#make df for red and green to be plotted together by extracting values from flowset object
datacols = as.character(events_green[[1]]@parameters@data[["name"]])
samplenames = sampleNames(events_green)

df_green = as.data.frame(matrix(nrow=0, ncol=length(datacols)+2))
df_red = as.data.frame(matrix(nrow=0, ncol=length(datacols)+2)) 
colnames(df_green) = c("Sample", "Population", datacols)
colnames(df_red) = c("Sample", "Population", datacols)

#match data types of df_green/red columns to match data_green/red columns for later row binding
datatypes=as_tibble(events_green[[1]]@exprs) %>%
    mutate(Sample = "datatype_char") %>%
    mutate(Population="datatype_char")
common = names(df_green)[names(df_green) %in% names(datatypes)]
df_green[common] = lapply(common, function(x) {match.fun(paste0("as.", class(datatypes[[x]])))(df_green[[x]])})
df_red[common] = lapply(common, function(x) {match.fun(paste0("as.", class(datatypes[[x]])))(df_red[[x]])})
    
for (sample in samplenames) {
    data_green = as.data.frame(events_green[[sample]]@exprs) %>%
    mutate(Sample = sample) %>%
    mutate(Population="green")
    
    df_green = bind_rows(df_green, data_green)
    
    data_red = as.data.frame(events_red[[sample]]@exprs) %>%
    mutate(Sample = sample) %>%
    mutate(Population="red")

    df_red = bind_rows(df_red, data_red)
}
green = df_green %>%
    select(Sample, Population, `530/30 Blue B-A`) %>%
    rename(Intensity = `530/30 Blue B-A`)

red = df_red %>%
    select(Sample, Population, `610/20 YG C-A`) %>%
    rename(Intensity = `610/20 YG C-A`)

df_intensity = bind_rows(green, red) %>%
    left_join(metadata, by=c("Sample"="name")) %>%
    mutate(Treatment=factor(Treatment, levels=c("NT", "GFPT")))
ggplot(df_intensity) +
    geom_bar(stat="bin", aes(x=Intensity, y=stat(count), fill=Population), position="identity", bins=50, alpha=0.75, colour="black", size=0.25) +
    ggcyto::scale_x_flowJo_biexp(name="Intensity", widthBasis=-100, pos=5, neg=0, breaks=c(0,1e3,1e4,1e5)) +
    scale_y_continuous(name="Count\n") +
    scale_fill_manual(values=c("green4","red3")) +
    theme_linedraw(14) +
    theme(panel.grid = element_blank(),legend.position = "none", 
          panel.border=element_rect(size=1), 
          strip.text.x = element_blank()) +
    facet_grid(Replicate_Full~Treatment) 

ggsave("figures/2019-11-20_GFP_mCherry_8h_replicates123_distribution_count.pdf", width=12, height=6)
ggplot(df_intensity) +
   geom_bar(stat="bin", aes(x=Intensity, y=stat(ncount), fill=Population), position="identity", bins=50, alpha=0.75, colour="black", size=0.25) +
    ggcyto::scale_x_flowJo_biexp(name="Intensity", widthBasis=-100, pos=5, neg=0, breaks=c(0,1e3,1e4,1e5)) +
    scale_y_continuous(name="Scaled Count\n", limits=c(0,1.05)) +
    scale_fill_manual(values=c("green4","red3")) +
    theme_linedraw(14) +
    theme(panel.grid = element_blank(),legend.position = "none", 
          panel.border=element_rect(size=1), 
          strip.text.x = element_blank()) +
    facet_grid(Replicate_Full~Treatment) 

ggsave("figures/2019-11-20_GFP_mCherry_8h_replicates123_distribution_scaledcount.pdf", width=12, height=6)

Plot replicate distrubtions combined

ggplot(df_intensity) +
    geom_bar(stat="bin", aes(x=Intensity, y=stat(count), fill=Population, group=interaction(Replicate, Population)), 
             position="identity", bins=50, alpha=0.25, colour="black", size=0.25) +
    ggcyto::scale_x_flowJo_biexp(name="Intensity", widthBasis=-100, pos=5, neg=0, breaks=c(0,1e3,1e4,1e5)) +
    scale_y_continuous(name="Count\n") +
    scale_fill_manual(values=c("green4","red3")) +
    theme_linedraw(14) +
    theme(panel.grid = element_blank(),legend.position = "none", 
          panel.border=element_rect(size=1), 
          strip.text.x = element_blank()) +
    facet_wrap(~Treatment)

ggsave("figures/2019-11-20_GFP_mCherry_8h_replicates_distribution_overlapping_count.pdf", width=12, height=3.5)
#try plotting the mean and sd over top of overlapping distributions
ggplot(df_intensity) +
    geom_bar(stat="bin", aes(x=Intensity, y=stat(count), fill=Population, group=interaction(Replicate, Population)), 
             position="identity", bins=50, alpha=0.25, colour="black", size=0.25) +
    stat_bin(geom="point", aes(x=Intensity, y=stat(count)/3, colour=Population, group=Population), 
             position="identity", bins=50, alpha=1, size=1) +
    ggcyto::scale_x_flowJo_biexp(name="Intensity", widthBasis=-100, pos=5, neg=0, breaks=c(0,1e3,1e4,1e5)) +
    scale_y_continuous(name="Count\n") +
    scale_fill_manual(values=c("green4","red3")) +
    scale_colour_manual(values=c("green4","red3")) +
    theme_linedraw(14) +
    theme(panel.grid = element_blank(),legend.position = "none", 
          panel.border=element_rect(size=1), 
          strip.text.x = element_blank()) +
    facet_wrap(~Treatment)

ggplot(df_intensity) +
    stat_bin(geom="bar", aes(x=Intensity, y=stat(count)/3, fill=Population, group=Population), 
             position="identity", bins=50, alpha=0.75, size=0.35, colour="black") +
    geom_line(stat="bin", aes(x=Intensity, y=stat(count), group=interaction(Replicate, Population)), 
             bins=50, alpha=0.5, colour="black", size=0.15) +
    geom_jitter(stat="bin", aes(x=Intensity, y=stat(count), fill=Population, group=interaction(Replicate, Population)), 
             width=0.005, height=0, bins=50, alpha=0.25, shape=21, colour="black", size=1.5, stroke=0.75) +
    ggcyto::scale_x_flowJo_biexp(name="Intensity", widthBasis=-100, pos=5, neg=0, breaks=c(0,1e3,1e4,1e5)) +
    scale_y_continuous(name="Count", limits=c(0,1100), breaks=seq(0,1000,200)) +
    scale_fill_manual(values=c("green4","red3")) +
    scale_colour_manual(values=c("green4","red3")) +
    theme_linedraw(14) +
    theme(panel.grid = element_blank(),legend.position = "none", 
          panel.border=element_rect(size=1), 
          strip.text.x = element_blank()) +
    facet_wrap(~Treatment, scales="free_y")

ggsave("figures/2019-11-20_GFP_mCherry_8h_replicates_distribution_mean_count.pdf", width=12, height=3.5)
#annotate with median of distributions
average = df_intensity %>%
    group_by(Population, Treatment) %>%
    mutate(median=median(Intensity)) %>%
    select(Treatment, Population, median) %>%
    unique() %>%
    rename(Intensity = median)

numbins=50

ggplot(df_intensity) +
    stat_bin(geom="bar", aes(x=Intensity, y=stat(count)/3, fill=Population, group=Population), 
             position="identity", bins=50, alpha=0.75, size=0.35, colour="black") +
    geom_line(stat="bin", aes(x=Intensity, y=stat(count), group=interaction(Replicate, Population)), 
             bins=50, alpha=0.5, colour="black", size=0.15) +
    geom_jitter(stat="bin", aes(x=Intensity, y=stat(count), fill=Population, group=interaction(Replicate, Population)), 
             width=0.005, height=0, bins=50, alpha=0.25, shape=21, colour="black", size=1.5, stroke=0.75) +
    geom_text(average, mapping=aes(x=Intensity, y=1125, label=round(Intensity), colour=Population)) +
    ggcyto::scale_x_flowJo_biexp(name="Intensity", widthBasis=-100, pos=5, neg=0, breaks=c(0,1e3,1e4,1e5)) +
    scale_y_continuous(name="Count", limits=c(0,1175), breaks=seq(0,1000,200)) +
    scale_fill_manual(values=c("green4","red3")) +
    scale_colour_manual(values=c("green4","red3")) +
    theme_linedraw(14) +
    theme(panel.grid = element_blank(),legend.position = "none", 
          panel.border=element_rect(size=1), 
          strip.text.x = element_blank()) +
    facet_wrap(~Treatment, scales="free_y")

ggsave("figures/2019-11-20_GFP_mCherry_8h_replicates_distribution_mean_count_median_value.pdf", width=12, height=3.5)
#find peaks of distributions
GFPT_green = df_intensity %>%
    dplyr::filter(Population=="green", Treatment=="GFPT") %>%
    pull(Intensity)

m1 = flexmix::FLXMRglm(family = "gaussian")
m2 = flexmix::FLXMRglm(family = "gaussian")
gfptfit = flexmix(GFPT_green ~ 1, data = as.data.frame(GFPT_green), k = 2, model = list(m1, m2))
GFPT_green_peak1 = parameters(gfptfit, component=1)[[1]]
GFPT_green_peak2 = parameters(gfptfit, component=2)[[1]]

NT_green = df_intensity %>%
    dplyr::filter(Population=="green", Treatment=="NT") %>%
    pull(Intensity)

m1 = flexmix::FLXMRglm(family = "gaussian")
ntfit = flexmix(NT_green ~ 1, data = as.data.frame(NT_green), k = 1, model = list(m1))
NT_green_peak1 = parameters(ntfit, component=1)[[1]]

#create compatible df with peak values
peaks = tibble(Intensity = c(GFPT_green_peak1[[1]], GFPT_green_peak2[[1]], NT_green_peak1[[1]]), 
               Treatment = c("GFPT", "GFPT", "NT"), 
               Population = c("green", "green", "green")) %>%
    mutate(Treatment = factor(Treatment, levels=c("NT", "GFPT")))
    
ggplot(df_intensity) +
    stat_bin(geom="bar", aes(x=Intensity, y=stat(count)/3, fill=Population, group=Population), 
             position="identity", bins=50, alpha=0.75, size=0.35, colour="black") +
    geom_line(stat="bin", aes(x=Intensity, y=stat(count), group=interaction(Replicate, Population)), 
             bins=50, alpha=0.5, colour="black", size=0.15) +
    geom_jitter(stat="bin", aes(x=Intensity, y=stat(count), fill=Population, group=interaction(Replicate, Population)), 
             width=0.005, height=0, bins=50, alpha=0.25, shape=21, colour="black", size=1.5, stroke=0.75) +
    geom_text(peaks, mapping=aes(x=Intensity*1.75, y=1125, label=round(Intensity), colour=Population), size=4.5) +
    geom_vline(peaks, mapping=aes(xintercept=Intensity), size=0.5, linetype="dashed") +
    ggcyto::scale_x_flowJo_biexp(name="Intensity", widthBasis=-100, pos=5, neg=0, breaks=c(0,1e3,1e4,1e5)) +
    scale_y_continuous(name="Count", limits=c(0,1200), breaks=seq(0,1200,250)) +
    scale_fill_manual(values=c("green4","red3")) +
    scale_colour_manual(values=c("green4","red3")) +
    theme_linedraw(14) +
    theme(panel.grid = element_blank(),legend.position = "none", 
          panel.border=element_rect(size=1), 
          strip.text.x = element_blank()) +
    facet_wrap(~Treatment, scales="free_y")

ggsave("figures/2019-11-20_GFP_mCherry_8h_replicates_distribution_mean_count_multimodal_peaks.pdf", width=12, height=3.5)
ggplot(df_intensity) +
    stat_bin(geom="bar", aes(x=Intensity, y=stat(ncount), fill=Population, group=Population), 
             position="identity", bins=31, alpha=0.75, size=0.35, colour="black") +
    geom_line(stat="bin", aes(x=Intensity, y=stat(ncount), group=interaction(Replicate, Population)), 
             bins=50, alpha=0.5, colour="black", size=0.15) +
    geom_jitter(stat="bin", aes(x=Intensity, y=stat(ncount), fill=Population, group=interaction(Replicate, Population)), 
             width=0.005, height=0, bins=50, alpha=0.25, shape=21, colour="black", size=1.5, stroke=0.75) +
    ggcyto::scale_x_flowJo_biexp(name="Intensity", widthBasis=-100, pos=5, neg=0, breaks=c(0,1e3,1e4,1e5)) +
    scale_y_continuous(name="Scaled Count") +
    scale_fill_manual(values=c("green4","red3")) +
    scale_colour_manual(values=c("green4","red3")) +
    theme_linedraw(14) +
    theme(panel.grid = element_blank(),legend.position = "none", 
          panel.border=element_rect(size=1), 
          strip.text.x = element_blank()) +
    facet_wrap(~Treatment, scales="free_y")

ggsave("figures/2019-11-20_GFP_mCherry_8h_replicates_distribution_mean_scaledcount.pdf", width=12, height=3.5)

Quantify red / green in NT / GFPT

quantify = percentage %>%
    mutate(population = gsub("green\\+", "GFP+", population)) %>%
    mutate(population = gsub("red\\+", "mCherry+", population)) %>%
    mutate(population = factor(population, levels = c("mCherry+", "GFP+"))) %>%
    mutate(Treatment = factor(Treatment, levels=c("NT", "GFPT")))
set.seed(2331)

ggplot(quantify) +
    geom_bar(stat="summary", aes(x=population, fill=population, y=true/count*100, group=Treatment), 
             fun.y=mean, width=0.65, colour="black", size=1) +
    geom_text(stat="summary", aes(x=population, colour=population, y=true/count*100, group=interaction(Treatment), 
                                  label=paste0(round(..y..),"%")), fun.y=mean, size=5, vjust = -2) +
    geom_jitter(aes(x=population, y=percent), shape=21, size=5, stroke=1.5, position=position_jitter(width = 0.25, height=0), fill="white") +
    scale_fill_manual(values=c("red3", "green4")) +
    scale_colour_manual(values=c("red3", "green4")) +
    scale_y_continuous(name="Percent positive events", limits=c(0,78), expand=c(0,0)) +
    scale_x_discrete(name="") +
    theme_linedraw(18) +
    theme(strip.text.x = element_blank(), legend.position = "none", 
          axis.ticks.x = element_blank(), panel.grid=element_blank(), panel.border=element_rect(size=1.5)) +
    facet_wrap(~Treatment, scales = "free_y")
## Warning: Ignoring unknown parameters: fun.y

## Warning: Ignoring unknown parameters: fun.y
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

ggsave("figures/2019-11-20_quantify_green_red_barplots.pdf", width=6, height=5)
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

Log session

sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1            stringr_1.4.0            dplyr_1.0.5             
##  [4] purrr_0.3.4              readr_1.4.0              tidyr_1.1.3             
##  [7] tibble_3.1.0             tidyverse_1.3.1          flexmix_2.3-17          
## [10] Phenoflow_1.1.2          foreach_1.5.1            flowAI_1.16.0           
## [13] flowFDA_0.99             mclust_5.4.7             multcomp_1.4-16         
## [16] TH.data_1.0-10           MASS_7.3-53.1            survival_3.2-10         
## [19] mvtnorm_1.1-1            flowFP_1.44.0            flowViz_1.50.0          
## [22] lattice_0.20-41          flowClean_1.24.0         ggcyto_1.12.0           
## [25] flowWorkspace_3.32.0     ncdfFlow_2.30.1          BH_1.75.0-0             
## [28] RcppArmadillo_0.10.4.0.0 ggplot2_3.3.3            flowCore_1.52.1         
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1         changepoint_2.2.2    backports_1.2.1     
##   [4] plyr_1.8.6           igraph_1.2.6         splines_3.6.2       
##   [7] digest_0.6.27        htmltools_0.5.1.1    fansi_0.4.2         
##  [10] magrittr_2.0.1       cluster_2.1.1        sfsmisc_1.1-11      
##  [13] recipes_0.1.15       Biostrings_2.52.0    modelr_0.1.8        
##  [16] gower_0.2.2          RcppParallel_5.1.2   matrixStats_0.58.0  
##  [19] sandwich_3.0-0       prettyunits_1.1.1    jpeg_0.1-8.1        
##  [22] colorspace_2.0-0     rvest_1.0.0          rrcov_1.5-5         
##  [25] haven_2.4.0          xfun_0.22            crayon_1.4.1        
##  [28] jsonlite_1.7.2       hexbin_1.28.2        graph_1.62.0        
##  [31] zoo_1.8-9            iterators_1.0.13     ape_5.4-1           
##  [34] glue_1.4.2           gtable_0.3.0         ipred_0.9-11        
##  [37] zlibbioc_1.30.0      XVector_0.24.0       phyloseq_1.28.0     
##  [40] IDPmisc_1.1.20       Rgraphviz_2.28.0     Rhdf5lib_1.6.3      
##  [43] BiocGenerics_0.32.0  DEoptimR_1.0-8       scales_1.1.1        
##  [46] DBI_1.1.1            Rcpp_1.0.6           progress_1.2.2      
##  [49] bit_4.0.4            stats4_3.6.2         lava_1.6.9          
##  [52] prodlim_2019.11.13   httr_1.4.2           RColorBrewer_1.1-2  
##  [55] ellipsis_0.3.1       modeltools_0.2-23    farver_2.1.0        
##  [58] pkgconfig_2.0.3      nnet_7.3-15          sass_0.3.1          
##  [61] dbplyr_2.1.1         utf8_1.2.1           caret_6.0-86        
##  [64] labeling_0.4.2       tidyselect_1.1.0     rlang_0.4.10        
##  [67] reshape2_1.4.4       cellranger_1.1.0     munsell_0.5.0       
##  [70] tools_3.6.2          cli_2.4.0            generics_0.1.0      
##  [73] ade4_1.7-16          broom_0.7.6          evaluate_0.14       
##  [76] biomformat_1.12.0    yaml_2.2.1           fs_1.5.0            
##  [79] ModelMetrics_1.2.2.2 knitr_1.32           robustbase_0.93-7   
##  [82] nlme_3.1-152         xml2_1.3.2           rstudioapi_0.13     
##  [85] compiler_3.6.2       png_0.1-7            reprex_2.0.0        
##  [88] bslib_0.2.4          pcaPP_1.9-73         stringi_1.5.3       
##  [91] highr_0.8            Matrix_1.3-2         vegan_2.5-7         
##  [94] permute_0.9-5        multtest_2.40.0      vctrs_0.3.7         
##  [97] pillar_1.6.0         lifecycle_1.0.0      jquerylib_0.1.3     
## [100] data.table_1.14.0    R6_2.5.0             latticeExtra_0.6-29 
## [103] KernSmooth_2.23-18   gridExtra_2.3        IRanges_2.18.3      
## [106] codetools_0.2-18     boot_1.3-27          assertthat_0.2.1    
## [109] rhdf5_2.28.1         withr_2.4.1          S4Vectors_0.22.1    
## [112] mgcv_1.8-34          parallel_3.6.2       hms_1.0.0           
## [115] grid_3.6.2           rpart_4.1-15         timeDate_3043.102   
## [118] class_7.3-18         rmarkdown_2.7        pROC_1.17.0.1       
## [121] Biobase_2.46.0       lubridate_1.7.10